data.dir = 'https://mdporter.github.io/SYS6018/data/' # data directory
library(R6018) # functions for SYS-6018
library(tidyverse) # functions for data manipulation
library(mlbench)
library(glmnet)
library(glmnetUtils)
# focal_url = "http://localhost/Data/spectro_nn/focalPlane/Equal/EqEvt731/order7_ep5/combine.csv"
focal_url = "http://localhost/Data/spectro_nn/focalPlane/Equal/EqEvt731/order10_ep5/combine.csv"
dataset_focal = readr::read_csv(focal_url)
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> .default = col_double()
#> )
#> ℹ Use `spec()` for the full column specifications.
dataSet = dataset_focal %>% as.matrix()
# dataSet
trainSize = round(0.2*nrow(dataSet))
train = sample(nrow(dataSet),size = trainSize)
test = - train
trainSet = dataset_focal[train,]
train_X = trainSet %>% select(-evtID, -runID, -CutID,-SieveRowID,-SieveColID,-bpmX,-bpmY,-targCalTh,-targCalPh)
train_y_theta = trainSet %>% select(targCalTh)
train_y_phi = trainSet %>% select(targCalPh)
n.fold = 20
fold = sample(rep(1:n.fold, length=nrow(train_X)))
fit.theta = cv.glmnet(as.matrix(train_X),as.matrix(train_y_theta),foldid=fold)
fit.phi = cv.glmnet(as.matrix(train_X),as.matrix(train_y_phi),foldid = fold)
plot(fit.phi)
plot(fit.theta)
### 2. check how it behave on the training dataset
trainSetRes = trainSet %>% select(evtID, runID, CutID,SieveRowID,SieveColID,bpmX,bpmY,targCalTh,targCalPh)
train_ResX = trainSet %>% select(-evtID, -runID, -CutID,-SieveRowID,-SieveColID,-bpmX,-bpmY,-targCalTh,-targCalPh)
train_ResX_theta = trainSet %>% select(targCalTh)
train_ResX_phi = trainSet %>% select(targCalPh)
train.y.theta.pred = predict(fit.theta,newx = as.matrix(train_ResX),s="lambda.1se")
colnames(train.y.theta.pred) = c("pred.theta")
trainSetRes <- cbind(trainSetRes,pred.thet = train.y.theta.pred)
train.y.phi.pred = predict(fit.phi,newx = as.matrix(train_ResX),s="lambda.1se")
colnames(train.y.phi.pred) = c("pred.phi")
trainSetRes <- cbind(trainSetRes,pred.phi = train.y.phi.pred)
trainSetRes$resid.theta <- trainSetRes$targCalTh - trainSetRes$pred.theta
trainSetRes$resid.phi <- trainSetRes$targCalPh - trainSetRes$pred.phi
val = trainSetRes %>%
filter(.$runID == 2239)
ggplot(val) +geom_bin2d(aes(x=pred.phi,y=pred.theta,color = "blue"),bins = 300) + geom_point(aes(y=targCalTh,x=targCalPh,color="green")) +
xlim(-0.015,0.025) +
labs(title=sprintf("run_2d_%d",2239))
val = trainSetRes %>%
filter(.$runID == 2240)
ggplot(val) +geom_bin2d(aes(x=pred.phi,y=pred.theta,color = "blue"),bins = 300) + geom_point(aes(y=targCalTh,x=targCalPh,color="green")) +
xlim(-0.015,0.025) +
labs(title=sprintf("run_2d_%d",2240))
#> Warning: Removed 2 rows containing non-finite values (stat_bin2d).
val = trainSetRes %>%
filter(.$runID == 2241)
ggplot(val) +geom_bin2d(aes(x=pred.phi,y=pred.theta,color = "blue"),bins = 300) + geom_point(aes(y=targCalTh,x=targCalPh,color="green")) +
xlim(-0.015,0.025) +
labs(title=sprintf("run_2d_%d",2241))
val = trainSetRes %>%
filter(.$runID == 2244)
ggplot(val) +geom_bin2d(aes(x=pred.phi,y=pred.theta,color = "blue"),bins = 300) + geom_point(aes(y=targCalTh,x=targCalPh,color="green")) +
xlim(-0.015,0.025) +
labs(title=sprintf("run_2d_%d",2244))
#> Warning: Removed 2 rows containing non-finite values (stat_bin2d).
val = trainSetRes %>%
filter(.$runID == 2245)
ggplot(val) +geom_bin2d(aes(x=pred.phi,y=pred.theta,color = "blue"),bins = 300) + geom_point(aes(y=targCalTh,x=targCalPh,color="green")) +
xlim(-0.015,0.025) +
labs(title=sprintf("run_2d_%d",2245))
#> Warning: Removed 2 rows containing non-finite values (stat_bin2d).
val = trainSetRes %>%
filter(.$runID == 2256)
ggplot(val) +geom_bin2d(aes(x=pred.phi,y=pred.theta,color = "blue"),bins = 300) + geom_point(aes(y=targCalTh,x=targCalPh,color="green")) +
xlim(-0.015,0.025) +
labs(title=sprintf("run_2d_%d",2256))
val = trainSetRes %>%
filter(.$runID == 2257)
ggplot(val) +geom_bin2d(aes(x=pred.phi,y=pred.theta,color = "blue"),bins = 300) + geom_point(aes(y=targCalTh,x=targCalPh,color="green")) +
xlim(-0.015,0.025) +
labs(title=sprintf("run_2d_%d",2257))
# make prediction on the theta and phi angles
testSet = dataset_focal[test,]
testSetRes = testSet %>% select(evtID, runID, CutID,SieveRowID,SieveColID,bpmX,bpmY,targCalTh,targCalPh)
test_X = testSet %>% select(-evtID, -runID, -CutID,-SieveRowID,-SieveColID,-bpmX,-bpmY,-targCalTh,-targCalPh)
test_y_theta = testSet %>% select(targCalTh)
test_y_phi = testSet %>% select(targCalPh)
test.y.theta.pred = predict(fit.theta,newx = as.matrix(test_X),s="lambda.1se")
colnames(test.y.theta.pred) = c("pred.theta")
testSetRes <- cbind(testSetRes,pred.thet = test.y.theta.pred)
test.y.phi.pred = predict(fit.phi,newx = as.matrix(test_X),s="lambda.1se")
colnames(test.y.phi.pred) = c("pred.phi")
testSetRes <- cbind(testSetRes,pred.phi = test.y.phi.pred)
testSetRes$resid.theta <- testSetRes$targCalTh - testSetRes$pred.theta
testSetRes$resid.phi <- testSetRes$targCalPh - testSetRes$pred.phi
# testSetRes
unique(testSetRes$runID) %>% print()
#> [1] 2239 2240 2241 2244 2245 2256 2257
val = testSetRes %>% filter(.$runID == 2256)
val
unique(testSetRes$runID)
#> [1] 2239 2240 2241 2244 2245 2256 2257
val = testSetRes %>%
filter(.$runID == 2239)
ggplot(val) +geom_bin2d(aes(x=pred.phi,y=pred.theta,color = "blue"),bins = 500) + geom_point(aes(y=targCalTh,x=targCalPh,color="green")) +
xlim(-0.015,0.025) +
labs(title=sprintf("run_2d_%d",2239))
val = testSetRes %>%
filter(.$runID == 2240)
ggplot(val) +geom_bin2d(aes(x=pred.phi,y=pred.theta,color = "blue"),bins = 500) + geom_point(aes(y=targCalTh,x=targCalPh,color="green")) +
xlim(-0.015,0.025) +
labs(title=sprintf("run_2d_%d",2240))
#> Warning: Removed 1 rows containing non-finite values (stat_bin2d).
val = testSetRes %>%
filter(.$runID == 2241)
ggplot(val) +geom_bin2d(aes(x=pred.phi,y=pred.theta,color = "blue"),bins = 500) + geom_point(aes(y=targCalTh,x=targCalPh,color="green")) +
xlim(-0.015,0.025) +
labs(title=sprintf("run_2d_%d",2241))
#> Warning: Removed 1 rows containing non-finite values (stat_bin2d).
val = testSetRes %>%
filter(.$runID == 2244)
ggplot(val) +geom_bin2d(aes(x=pred.phi,y=pred.theta,color = "blue"),bins = 500) + geom_point(aes(y=targCalTh,x=targCalPh,color="green")) +
xlim(-0.015,0.025) +
labs(title=sprintf("run_2d_%d",2244))
val = testSetRes %>%
filter(.$runID == 2245)
ggplot(val) +geom_bin2d(aes(x=pred.phi,y=pred.theta,color = "blue"),bins = 500) + geom_point(aes(y=targCalTh,x=targCalPh,color="green")) +
xlim(-0.015,0.025) +
labs(title=sprintf("run_2d_%d",2245))
#> Warning: Removed 1 rows containing non-finite values (stat_bin2d).
val = testSetRes %>%
filter(.$runID == 2256)
ggplot(val) +geom_bin2d(aes(x=pred.phi,y=pred.theta,color = "blue"),bins = 500) + geom_point(aes(y=targCalTh,x=targCalPh,color="green")) +
xlim(-0.015,0.025) +
labs(title=sprintf("run_2d_%d",2256))
val = testSetRes %>%
filter(.$runID == 2257)
ggplot(val) +geom_bin2d(aes(x=pred.phi,y=pred.theta,color = "blue"),bins = 500) + geom_point(aes(y=targCalTh,x=targCalPh,color="green")) +
xlim(-0.015,0.025) +
labs(title=sprintf("run_2d_%d",2257))
#> Warning: Removed 6 rows containing non-finite values (stat_bin2d).
#ggplot(val,aes(x=pred.phi,y=pred.theta,color="red")) + geom_point()
residalRes = tibble(runID = numeric(), cutID = numeric(),mean.resid.theta = numeric(),stde.resid.theta = numeric(),mean.resid.phi = numeric(),stde.resid.phi= numeric())
runID.list = unique(testSetRes$runID)
for (id in runID.list) {
test.res = testSetRes %>%
filter(.$runID == id)
cutid.list = unique(test.res$CutID)
for (cutid in cutid.list){
test.res.cutid = test.res %>%
filter(.$CutID == cutid)
mean.resid.theta = mean(test.res.cutid$resid.theta)
stde.resid.theta = sd(test.res.cutid$resid.theta)/sqrt(length(test.res.cutid$resid.theta))
mean.resid.phi = mean(test.res.cutid$resid.phi)
stde.resid.phi = sd(test.res.cutid$resid.phi)/sqrt(length(test.res.cutid$resid.phi))
residalRes = add_row(residalRes,tibble(runID=id,cutID = cutid, mean.resid.theta = mean.resid.theta,
stde.resid.theta=stde.resid.theta,
mean.resid.phi = mean.resid.phi,
stde.resid.phi = stde.resid.phi))
}
}
residalRes %>%
filter(.$runID == 2239) %>%
ggplot(aes(x=cutID,y=mean.resid.phi)) + geom_point() + ylim(-0.003,0.003) +
labs(title=sprintf("residual_%d",2239))
residalRes %>%
filter(.$runID == 2240) %>%
ggplot(aes(x=cutID,y=mean.resid.phi)) + geom_point() + ylim(-0.003,0.003) +
labs(title=sprintf("residual_%d",2240))
residalRes %>%
filter(.$runID == 2241) %>%
ggplot(aes(x=cutID,y=mean.resid.phi)) + geom_point() + ylim(-0.003,0.003) +
labs(title=sprintf("residual_%d",2241))
residalRes %>%
filter(.$runID == 2244) %>%
ggplot(aes(x=cutID,y=mean.resid.phi)) + geom_point() + ylim(-0.003,0.003) +
labs(title=sprintf("residual_%d",2244))
residalRes %>%
filter(.$runID == 2245) %>%
ggplot(aes(x=cutID,y=mean.resid.phi)) + geom_point() + ylim(-0.003,0.003) +
labs(title=sprintf("residual_%d",2245))
residalRes %>%
filter(.$runID == 2256) %>%
ggplot(aes(x=cutID,y=mean.resid.phi)) + geom_point() + ylim(-0.003,0.003) +
labs(title=sprintf("residual_%d",2256))
residalRes %>%
filter(.$runID == 2257) %>%
ggplot(aes(x=cutID,y=mean.resid.phi)) + geom_point() + ylim(-0.003,0.003) +
labs(title=sprintf("residual_%d",2257))
res = testSetRes %>%
filter(.$runID == 2239)
mid_phi_df <- res %>%
group_by(CutID) %>%
summarize(median=median(resid.phi))
ggplot(res, aes(x=resid.phi,color = "CutID")) + geom_density() +
geom_vline(data =mid_phi_df, aes(xintercept = median))+
facet_wrap(~CutID)
xlim(-0.002,0.002)
#> <ScaleContinuousPosition>
#> Range:
#> Limits: -0.002 -- 0.002